home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Collection of Tools & Utilities
/
Collection of Tools and Utilities.iso
/
tex
/
gestalt.zip
/
SIMIL_P.ASM
< prev
next >
Wrap
Assembly Source File
|
1988-12-06
|
10KB
|
258 lines
;From the article "Pattern Matching - The Gestalt Approach"
;by John W. Ratcliff and David E. Metzener
;Dr. Dobbs Journal, July 1988
;Rewritten for Turbo Pascal v3.0 inline code and INLINE.COM.
;Now expects normal Pascal strings (first byte is length byte).
;Usage:
;
;FUNCTION Simil(VAR Str1,Str2) : INTEGER;
; VAR result : INTEGER;
; BEGIN
; (*$I simil_p.obj*) (* this code *)
; Simil := result; (*return the function result*)
; END; (* of Simil *)
;
;See TESTSIM.PAS for required global variables.
;
;Extensive remarks removed. See other versions.
; David Kirschbaum
; Toad Hall
; kirsch@braggvax.ARPA
push bp ;save Turbo's BP
push ES ;and ES
cld ;insure forward
les si,>str1[bp] ;ES:SI = str1 addr
mov di,>str2[bp] ;ES:DI = str2 addr
;
xor ax,ax ;get a 0
mov [>score],ax ;zero out SCORE
mov [>stcknum],ax ;init nr of stack entries to 0
ES:
cmp [si],al ;is it a null string?
je StrErr ;can't process null strings
ES:
cmp [di],al ;is it a null string?
jne DoCmp ;Both strs ok, so process them
StrErr:
jmp Exit ;exit routine
;
DoCmp:
xor ah,ah ;clear msb
ES:
mov al,[si] ;get str1 length byte
mov cx,ax ;accum total len in CX
add ax,si ;add in string start
mov bx,ax ;BX points to str1 last char
inc si ;bump SI past str1 length byte
;
xor ah,ah ;clear msb
ES:
mov al,[di] ;str2 length byte
add cx,ax ;accum total len in CX
add ax,di ;add in start
mov bp,ax ;BP points to str2 last char
inc di ;bump DI past str2 length byte
;
mov [>total],cx ;store total string length
;
call PushSt ;push values for
; ; first call to SIMILIARITY
Main:
cmp word [>stcknum],0 ;anything on the stack?
je Done ;no, then all done!
;
call PopSt ;get regs set up for a compare call
call Compare ;do compare for this substring set
or dx,dx ;if nothing in common
; then nothing to push
jz Main ;try another set
;
shl dx,1 ;*2
add [>score],dx ;add into score
mov bp,[>stcknum] ;get nr of entry
; I want to look at
shl bp,1 ;get AX ready
; to access string stacks
DS: ;override SS:bp
mov si,[>ststr1L][bp] ;move L1 into SI or L1
mov bx,[>cL1] ;move CL1 into BX or R1
DS: ;override SS:bp
mov di,[>ststr2L][bp] ;move L2 into DI or L2
mov cx,[>cL2] ;move CL2 into CX temporarily
DS: ;override SS:bp
mov ax,[>ststr1R][bp] ;get old R1 off of stack
mov [>cL1],ax ;place in CL1 temporarily
DS: ;override SS:bp
mov ax,[>ststr2R][bp] ;get old R2 off of stack
mov [>cL2],ax ;save in CL2 temporarily
mov bp,cx ;place CL2 into BP
;
cmp bx,si ;compare CL1 to L1
je Chrght ;if zero, then nothing
; ; on left side str1
cmp bp,di ;compare CL2 to L2
je Chrght ;if zero, then nothing
; ; on left side str2
dec bx ;point to last part
; of left side str1
dec bp ;ditto, str2
cmp bx,si ;only one char to examine?
jne PushIt ;no->we need to examine this
cmp bp,di ; only one char in both?
je Chrght ; nothing to look at
; ; if both only contain 1 char
PushIt:
call PushSt ;push left side on stack
Chrght:
mov si,[>cR1] ;move CR1 into SI or L1
mov bx,[>cL1] ;move R1 into BX or R1
mov di,[>cR2] ;move CR2 into DI or L2
mov bp,[>cL2] ;move R2 into BP or R2
;
cmp si,bx ;compare CR1 to R1
je Main ;If zero, then nothing
; on right side str1
cmp di,bp ;compare CR2 to R2
je Main ;if zero, then nothing
; on right side str2
inc si ;point to last part
; of right side str1
inc di ;ditto, str2
cmp bx,si ;only one char to examine?
jne Push2 ;no-> examine it
cmp bp,di ; only 1 char to examine in both?
je Main ; yes-> get next string off of stack
;
Push2:
call PushSt ;push right side on stack
jmp short Main ;do next level of compares
;
Done:
mov ax,[>score] ;get score into AX for MUL
or ax,ax ;zero score?
jz Donit ;yep, skip this mess
mov cx,100 ; get 100 into CX for MUL
mul cx ; multiply by 100
mov cx,[>total] ; get total chars for divide
div cx ; divide by total
Donit:
jmp Exit ;clean up and exit
;
;subroutines
Compare:
mov [>s2ed],bp ;store end of str2
xor dx,dx ;init MAXCHARS
ForL3:
push di ;save start of str2
ForL4:
push di ;save start of str2
push si ;save start of str1
mov cx,[>s2ed] ;set up for calc
; of length of str1
sub cx,di ;get length of str1 -1
inc cx ;make proper length
push cx ;save str1 starting len
push DS ;save our DS a sec
mov ax,ES
mov DS,ax ;force DS=ES for the compare
repz cmpsb ;compare strings
pop DS ;restore our DS
jz Equal ;if equal, then skip fixes
inc cx ; inc back because CMPS decs
; even if not equal
Equal:
pop ax ;get str1 starting len
sub ax,cx ;get length of common characters
jnz NewMax ;more than 0 chars matched
;
pop si ;get back start of str1
pop di ;ditto str2
;
Reent:
inc di ;do the next char no matter what
Reent2:
cmp di,bp ;are we done with str2?
jle ForL4 ;no, then do next string compare
pop di ;get back start of str2
inc si ;next char in str1 to scan
cmp si,bx ;are we done with str1?
jle ForL3 ;no, then do next string compare
ret ;MAXCHARS is in DX
;
NewMax:
pop si ;get back start of str1
pop di ;and str2
cmp ax,dx ;greater than MAXCHARS?
jg NewMx2 ;yes, update new MAXCHARS
;and pointers
add di,ax ; skip past matching chars
jmp short Reent2 ; reenter main loop
;
NewMx2:
mov [>cL1],si ;put begin of match of str1
mov [>cL2],di ;ditto, str2
mov cx,ax ;save new MAXCHARS
;
sub ax,dx ;get delta for adjustment
; to ends of strings
sub bx,ax ;adjust end of str1
sub bp,ax ; and str2
;
mov dx,cx ;new MAXCHARS
dec cx ;set up for advance
; to last matching char
add di,cx ;bump to last matching char str2
mov [>cR2],di ;put end of match of str2
add cx,si ;bump to last matching char str1
mov [>cR1],cx ;put end of match of str1
jmp short Reent ;reenter inner loop
;
PushSt:
; On Entry:
; BX = R1 (right side of str1)
; DS:SI = L1 (left side of str1)
; ES:Di = L2 (left side of str2)
; BP = R2 (right side of str2)
;
mov cx,bp ;save R2
mov bp,[>stcknum]
shl bp,1 ;*2 for words